# install.packages("psych", repos = "http://personality-project.org/r", type = "source")
# install.packages("psychTools", repos = "http://personality-project.org/r", type = "source")
# Load the relevant libraries --------------------------
library(psych)
library(psychTools)
library(tidyverse)
library(janitor)
library(readxl)
library(ggpubr)
library(kableExtra)
# Make sure you're running the most recent version of psych
# sessionInfo()# Load the functions and data --------------------------
`%nin%` <- Negate(`%in%`)
## Load in the score data
dryRun_ScoreExportNarrow <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_ScoreExportNarrow_20230323.csv") %>%
janitor::clean_names()
# dim(dryRun_ScoreExportNarrow)
# colnames(dryRun_ScoreExportNarrow)
## Load in the item data
dryRun_ItemExportNarrow <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_ItemExportNarrow_20230323.csv") %>%
janitor::clean_names()
# dim(dryRun_ItemExportNarrow)
# colnames(dryRun_ItemExportNarrow)
## Load in the age data
dryRun_Registration_Age <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_Registration_Age_20230323.csv") %>%
janitor::clean_names()
# dim(dryRun_Registration_Age)
# colnames(dryRun_Registration_Age)
## Load in the DP4 data
dryRun_dp4 <- read_csv("/Volumes/fsmresfiles/MSS/Research/Projects/Baby_Toolbox/Data/Combined_Dry_Run_Data/dryRun_DP4_20230323.csv") %>%
janitor::clean_names()
# dim(dryRun_dp4)
# colnames(dryRun_dp4)
# These are the IDs we care about for analysis
ids_for_analysis <- read_csv("data/2023-03-22T172307_shouldHavecorrected2.csv") %>%
rename(PIN = PINsago)
# dim(ids_for_analysis)
analysis_ids <- ids_for_analysis %>%
pull(PIN)item_wide <- dryRun_ItemExportNarrow %>%
filter(key == "Score") %>%
pivot_wider(id_cols = c(pin, instrument_title),
names_from = item_id,
values_from = value) %>%
type_convert()scores_long_df <- dryRun_ScoreExportNarrow %>%
filter(pin %in% analysis_ids)
scores_long_age_df <- full_join(scores_long_df, dryRun_Registration_Age,
by = c("pin", "pid", "registration_id", "assessment_name"),
multiple = "all")pull_instrument <- item_wide %>%
count(instrument_title) %>%
pull(instrument_title)
# Create a list that to save the variables into it
describe_df <- data.frame(instrument_title = NA,
item_id = NA,
n = NA,
mean = NA,
sd = NA,
min = NA,
max = NA)
# Determine if variables are poly or tetrachoric
for(i in 1:length(pull_instrument)) {
df <- item_wide %>%
filter(instrument_title %in% paste(pull_instrument[i])) %>%
select(-c(pin))
df <- df[,colSums(is.na(df))<nrow(df)]
temp_describe <- describe(df, skew = FALSE) %>%
data.frame() %>%
select(-c(vars, range, se)) %>%
mutate(instrument_title = paste(pull_instrument[i]),
item_id = rownames(describe(df, skew = FALSE))) %>%
select(item_id, everything())
row.names(temp_describe) <- NULL
describe_df <- bind_rows(describe_df, temp_describe)
}
describe_df <- describe_df %>%
filter(!is.na(instrument_title))
pull_poly <- describe_df %>%
filter(max > 1) %>%
count(instrument_title) %>%
# Verbal Counting only contains 1 scored item and therefore can't get a correlation
filter(instrument_title != "Verbal Counting") %>%
pull(instrument_title)
pull_tetra <- describe_df %>%
filter(instrument_title %nin% pull_poly) %>%
filter(max == 1) %>%
filter(min == 0) %>%
count(instrument_title) %>%
pull(instrument_title)### Do all the polychoric measures ----
for(i in 1:length(pull_poly)) {
df <- item_wide %>%
filter(instrument_title %in% paste(pull_poly[i])) %>%
select(-c(pin, instrument_title))
df <- df[,colSums(is.na(df))<nrow(df)]
cor_matrix <- df %>%
cor(., use = "pairwise", method = "spearman") %>%
round(., digits = 3)
cor_matrix %>%
cor.plot(., xlas = 3, main = paste(pull_poly[i]))
#print(cor_matrix) # This needs to be prettier
}### Do the tetrachoric matrices ----
for(i in 1:length(pull_tetra)) {
df <- item_wide %>%
filter(instrument_title %in% paste(pull_tetra[i])) %>%
select(-c(pin, instrument_title))
df <- df[,colSums(is.na(df))<nrow(df)]
cor_matrix_all <- df %>%
tetrachoric(., delete = FALSE, correct = FALSE)
cor_matrix <- round(cor_matrix_all$rho, digits = 3)
cor_matrix %>%
cor.plot(., xlas = 3, main = paste(pull_tetra[i]))
#print(cor_matrix) # This needs to be prettier
}
# Assessment Times ----
## Code what the assessment was
pull_parent_assessments <- scores_long_df %>%
select(test_name) %>%
filter(str_detect(test_name, "CBQ") |
str_detect(test_name, "CDI") |
str_detect(test_name, "Caregiver") |
str_detect(test_name, "IBQ") |
str_detect(test_name, "PROMIS")) %>%
unique() %>%
pull(test_name)
# Create a DF that will allow us to know the type of assessment
child_parent_assessment <- scores_long_df %>%
select(pin, registration_id, test_name, instrument_title) %>%
unique() %>%
mutate(type = ifelse(test_name %in% pull_parent_assessments, "Parent",
"Child")) %>%
select(pin, registration_id, type) %>%
unique()
# Figure out the 16-21 battery
touch_gaze_df <- dryRun_ItemExportNarrow %>%
filter(pin %in% analysis_ids) %>%
filter(key == "Score") %>%
select(pin, registration_id, instrument_title) %>%
unique() %>%
full_join(., child_parent_assessment, by = c("pin", "registration_id")) %>%
filter(type == "Child") %>%
select(-c(type)) %>%
filter(instrument_title %in% c("Executive Function", "NBT Touch Screen Tutorial",
"Memory Task Learning", "Memory Task Test")) %>%
filter(!is.na(instrument_title)) %>%
arrange(pin) %>%
mutate(battery = ifelse(instrument_title == "Executive Function", "Gaze",
ifelse(instrument_title == "NBT Touch Screen Tutorial", "Touch",
ifelse(instrument_title == "Memory Task Learning", "Touch",
ifelse(instrument_title == "Memory Task Test", "Touch",
NA))))) %>%
select(-c(instrument_title)) %>%
unique()
# Find the assessment times of each battery
all_battery_times <- dryRun_ItemExportNarrow %>%
filter(pin %in% analysis_ids) %>%
filter(key == "DateCreated") %>%
group_by(pin, registration_id) %>%
mutate(min_time = min(value),
max_time = max(value)) %>%
select(pin, registration_id, min_time, max_time) %>%
unique() %>%
mutate(min_time = lubridate::as_datetime(min_time),
max_time = lubridate::as_datetime(max_time),
diff = difftime(max_time, min_time, units = "mins")) %>%
ungroup() %>%
mutate(diff_min = str_remove(diff, " min"),
diff_min = as.numeric(diff_min)) %>%
full_join(., dryRun_Registration_Age,
by = c("pin", "registration_id"),
multiple = "all") %>%
select(pin, registration_id, total_age_in_months, min_time, max_time, diff_min) %>%
full_join(., child_parent_assessment, by = c("pin", "registration_id")) %>%
full_join(., touch_gaze_df, by = c("pin", "registration_id")) %>%
mutate(parent_battery = ifelse(between(total_age_in_months, 3,5), "3-5 Month",
ifelse(total_age_in_months == 6, "6 Month",
ifelse(between(total_age_in_months, 7,8), "7-8 Month",
ifelse(between(total_age_in_months, 9,12), "9-12 Month",
ifelse(between(total_age_in_months, 13,18), "13-18 Month",
ifelse(between(total_age_in_months, 19,30), "19-30 Month",
ifelse(between(total_age_in_months, 31,36), "31-36 Month",
ifelse(total_age_in_months >= 37, "37+ Month", NA)))))))),
parent_battery = ifelse(type == "Parent", parent_battery, NA),
parent_battery = as.factor(parent_battery),
parent_battery = fct_relevel(parent_battery, "3-5 Month","6 Month",
"7-8 Month", "9-12 Month", "13-18 Month",
"19-30 Month", "31-36 Month","37+ Month"),
child_battery = ifelse(between(total_age_in_months, 1,5), "1-5 Month",
ifelse(between(total_age_in_months, 6,8), "6-8 Month",
ifelse(between(total_age_in_months, 9,15), "9-21 Month",
ifelse(battery == "Gaze" & between(total_age_in_months, 16,21), "9-21 Month",
ifelse(battery == "Touch" & between(total_age_in_months, 16,21), "22-24 Month",
ifelse(between(total_age_in_months, 22,24), "22-24 Month",
ifelse(between(total_age_in_months, 25,36), "25-36 Month",
ifelse(total_age_in_months >= 37, "37+ Month", NA)))))))),
child_battery = ifelse(type == "Child", child_battery, NA),
child_battery = as.factor(child_battery),
child_battery = fct_relevel(child_battery, "1-5 Month", "6-8 Month", "9-21 Month",
"22-24 Month", "25-36 Month", "37+ Month")
) %>%
arrange(pin) %>%
select(-c(battery))
battery_times_child <- all_battery_times %>%
filter(type == "Child") %>%
select(-c(type))
battery_times_parent <- all_battery_times %>%
filter(type == "Parent") %>%
select(-c(type))all_battery_times %>%
ggplot(aes(x = diff_min)) +
geom_histogram() +
facet_wrap(~type)## Child Battery -----
battery_times_child %>%
ggplot(aes(x = diff_min)) +
geom_histogram() +
facet_wrap(~child_battery) +
labs(title = "Distribution of Timing on Child Batteries",
x = "Time on Battery (min)",
y = "Count")child_battery_times <- describeBy(battery_times_child$diff_min, group = battery_times_child$child_battery,
mat = TRUE, skew = FALSE) %>%
select(-c(item, vars))
rownames(child_battery_times) <- NULL
child_battery_times %>%
kbl(caption = "Descriptive Statistics of Child Batteries") %>%
kable_styling()| group1 | n | mean | sd | min | max | range | se |
|---|---|---|---|---|---|---|---|
| 1-5 Month | 6 | 27.81944 | 10.063368 | 14.96667 | 43.60000 | 28.63333 | 4.108353 |
| 6-8 Month | 7 | 44.53333 | 4.414968 | 38.83333 | 49.80000 | 10.96667 | 1.668701 |
| 9-21 Month | 28 | 66.00060 | 14.446816 | 45.38333 | 100.01667 | 54.63333 | 2.730192 |
| 22-24 Month | 8 | 68.18125 | 14.093983 | 50.31667 | 91.23333 | 40.91667 | 4.982975 |
| 25-36 Month | 20 | 59.02333 | 6.623665 | 47.68333 | 69.53333 | 21.85000 | 1.481097 |
| 37+ Month | 14 | 72.24881 | 9.510726 | 58.81667 | 90.50000 | 31.68333 | 2.541848 |
## Parent Battery -----
battery_times_parent %>%
ggplot(aes(x = diff_min)) +
geom_histogram() +
facet_wrap(~parent_battery) +
labs(title = "Distribution of Timing on Parent Batteries",
x = "Time on Battery (min)",
y = "Count")parent_battery_times <- describeBy(battery_times_parent$diff_min, group = battery_times_parent$parent_battery,
mat = TRUE, skew = FALSE) %>%
select(-c(item, vars))
rownames(parent_battery_times) <- NULL
parent_battery_times %>%
kbl(caption = "Descriptive Statistics of Parent Batteries") %>%
kable_styling()| group1 | n | mean | sd | min | max | range | se |
|---|---|---|---|---|---|---|---|
| 3-5 Month | 5 | 7.68000 | 5.565362 | 4.116667 | 17.46667 | 13.35000 | 2.488905 |
| 6 Month | 2 | 15.20000 | 9.805214 | 8.266667 | 22.13333 | 13.86667 | 6.933333 |
| 7-8 Month | 5 | 14.31667 | 7.732651 | 7.050000 | 24.78333 | 17.73333 | 3.458147 |
| 9-12 Month | 10 | 19.00333 | 12.423983 | 4.950000 | 40.10000 | 35.15000 | 3.928808 |
| 13-18 Month | 11 | 27.54848 | 16.862303 | 8.233333 | 55.86667 | 47.63333 | 5.084175 |
| 19-30 Month | 22 | 16.18182 | 9.255626 | 4.466667 | 47.15000 | 42.68333 | 1.973306 |
| 31-36 Month | 10 | 15.31333 | 7.160436 | 4.733333 | 25.80000 | 21.06667 | 2.264329 |
| 37+ Month | 14 | 14.05000 | 4.575067 | 9.016667 | 27.01667 | 18.00000 | 1.222738 |